home *** CD-ROM | disk | FTP | other *** search
- /*
- HTML to text converter
- EvenMore FileIO plugin
- Author: Chris Perver
- Copyright (c) 2001
- */
-
-
- OPT MODULE, REG=5
-
-
- MODULE 'tools/ctype'
-
-
- EXPORT PROC convhtml(memadr:PTR TO CHAR, begin, lenadr, mem2 = NIL:PTR TO CHAR, lenadr2 = NIL)
- DEF count = 0, count2 = 0
- DEF notdone = TRUE
- -> Allocate memory for file
- -> IF (mem2 := New(lenadr1)) = NIL THEN RETURN -1
- -> mem2[lenadr + 1] := "\n" -> Put safety LF at the end of mem
-
- DEF html[256]:STRING, wordwrap = 80, cchar = 0, ww = TRUE, oldpos = 0, oldcc = 0, i, j
- DEF pre = FALSE, iscomm = FALSE
- DEF isnotend = TRUE
-
- IF mem2 = NIL
- mem2 := memadr
- lenadr2 := lenadr
- ENDIF
- count := begin
- count2 := begin
-
- WHILE notdone AND (count < lenadr) AND (count2 < lenadr2)
-
-
- SELECT 256 OF memadr[count]
- -> SPECIAL
- CASE "&"
- UpperStr(StrCopy(html, memadr + count + 1,10))
- IF InStr(html, 'NBSP') = 0
- mem2[count2++] := " "
- INC cchar
- ELSEIF InStr(html, 'AMP') = 0
- mem2[count2++] := "&"
- INC cchar
- ELSEIF InStr(html, 'QUOT') = 0
- mem2[count2++] := $22
- INC cchar
- ELSEIF InStr(html, 'LT') = 0
- mem2[count2++] := "<"
- INC cchar
- ELSEIF InStr(html, 'GT') = 0
- mem2[count2++] := ">"
- INC cchar
- ENDIF
- WHILE memadr[count] <> ";" DO INC count
- INC count
-
- -> Ignore LF, CR
- CASE "\n", "\b"
- IF pre = FALSE
-
- IF mem2[count2 - 1] <> " "
- IF iscomm = FALSE
- mem2[count2++] := " "
- ENDIF
- ENDIF
-
-
- ELSE
- mem2[count2++] := memadr[count]
- cchar := 0
- ENDIF
- INC count
-
- -> Skip prespaces
- WHILE memadr[count] = " " DO INC count
-
- -> Cut out extra spaces
- CASE " "
- IF pre = FALSE
- IF mem2[count2-1] <> " "
- mem2[count2++] := memadr[count++]
- INC cchar
- ENDIF
- WHILE memadr[count] = " " DO INC count
- ELSE
- mem2[count2++] := memadr[count++]
- INC cchar
- ENDIF
-
- -> Commands
- CASE "<"
- iscomm := TRUE
-
- INC count
-
- -> Get len
- i := count
- WHILE memadr[i] <> ">" DO INC i
- INC i
- UpperStr(StrCopy(html, memadr + count, i - count))
-
-
- SELECT 256 OF memadr[count]
-
- -> END OF COMMAND
- CASE "/"
- iscomm := FALSE
-
- -> UNDO BOLD
- IF memadr[count + 2] = ">"
- SELECT 256 OF memadr[count + 1]
- CASE "a", "A"
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "4"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "1"
- mem2[count2++] := "m"
- CASE "b", "B"
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "1"
- mem2[count2++] := "m"
- CASE "i", "I"
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- CASE "u", "U"
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "4"
- mem2[count2++] := "m"
- CASE "p", "P"
- mem2[count2++] := "\n"
- cchar := 0
-
- ENDSELECT
- count := i
-
- -> IGNORE
- ELSE
-
-
- SELECT 256 OF memadr[count + 1]
- -> ADDRESS
- CASE "a", "A"
- IF InStr(html, '/ADDRESS') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "1"
- mem2[count2++] := ";"
- mem2[count2++] := "2"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ENDIF
- -> CENTER, CITE
- CASE "c", "C"
- IF InStr(html, '/CITE') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ELSEIF InStr(html, '/CENTER') = 0
- mem2[count2++] := "\n"
- cchar := 0
- ENDIF
-
- -> END DD
- CASE "d", "D"
- IF InStr(html, '/DD') = 0
- cchar := 0
- ENDIF
-
- -> END OF HEADER
- CASE "h", "H"
- IF InStr(html, '/H') = 0
- IF (memadr[count+2] >= "1" AND memadr[count+2] <= "6")
- mem2[count2++] := "\n"
- mem2[count2++] := "\n"
- cchar := 0
- ENDIF
- ENDIF
-
- -> END OF PRE
- CASE "p", "P"
- IF InStr(html, '/PRE') = 0
- mem2[count2++] := "\n"
- cchar := 0
- pre := FALSE
- -> END OF PARA
- ELSEIF InStr(html, '/P') = 0 ->memadr[count+2] = ">"
- mem2[count2++] := "\n"
- cchar := 0
- ENDIF
-
- -> END OF BULLET
- CASE "t", "T"
- IF InStr(html, '/TD') = 0
- mem2[count2++] := "\n"
- cchar := 0
- ENDIF
-
- -> /STRONG
- CASE "s", "S"
- IF InStr(html, '/STRONG') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "1"
- mem2[count2++] := "m"
- ENDIF
-
- -> /VAR
- CASE "v", "V"
- IF InStr(html, '/VAR') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ENDIF
-
- ENDSELECT
-
- count := i
- ENDIF
-
-
-
- -> COMMENT
- CASE "!"
- count := i
-
- -> URL
- CASE "a", "A"
- IF InStr(html, 'HREF') > 0
- count := i
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "4"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ELSEIF InStr(html, 'NAME') > 0
- count := i
- ELSEIF InStr(html, 'AREA') = 0
- count := i
- ELSEIF InStr(html, 'ADDRESS') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "1"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- count := i
- ENDIF
-
- -> BODY, BASE, BREAK
- CASE "b", "B"
- -> BOLD
- IF InStr(html, 'BODY') = 0
- count := i
- ELSEIF InStr(html, 'BASE') = 0
- count := i
- ELSEIF InStr(html, 'BR') = 0
- mem2[count2++] := "\n"
- cchar := 0
- count := i
-
- -> Take it as BOLD
- ELSE
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "1"
- mem2[count2++] := "m"
-
- count := i
- ENDIF
-
- -> CENTER, CITE, CODE
- CASE "c", "C"
- IF InStr(html, 'CENTER') = 0
- count := i
- ELSEIF InStr(html, 'CODE') = 0
- count := i
- ELSEIF InStr(html, 'CITE') = 0
- count := i
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ENDIF
-
- -> LISTS
- CASE "d", "D"
- IF InStr(html, 'DD') = 0
- mem2[count2++] := "\n"
- cchar := 0
- count := i
- ELSEIF InStr(html, 'DL') = 0
- mem2[count2++] := "\n"
- cchar := 0
- count := i
- ELSEIF InStr(html, 'DT') = 0
- count := i
- ELSEIF InStr(html, 'DIV') = 0
- mem2[count2++] := "\n"
- count := i
-
- ENDIF
-
- -> EMPHASIZE
- CASE "e", "E"
- IF InStr(html, 'EM') = 0
- count := i
- ENDIF
-
- -> FONT
- CASE "f", "F"
- IF InStr(html, 'FONT') = 0
- count := i
- ELSEIF InStr(html, 'FRAME') = 0
- count := i
- ELSEIF InStr(html, 'FORM') = 0
- count := i
- ENDIF
-
- -> HTML, HEAD, HEADINGS, HORIZ RULE
- CASE "h", "H"
- IF (memadr[count+1] >= "1" AND memadr[count+1] <= "6")
- mem2[count2++] := "\n"
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "1"
- mem2[count2++] := "m"
-
- cchar := 0
- count := i
- ELSEIF InStr(html, 'HTML') = 0
- count := i
- ELSEIF InStr(html, 'HEAD') = 0
- count := i
- ELSEIF InStr(html, 'HR') = 0
- mem2[count2++] := "\n"
- mem2[count2++] := "\n"
- FOR j := 0 TO 79
- mem2[count2++] := "-"
- ENDFOR
- mem2[count2++] := "\n"
- mem2[count2++] := "\n"
- cchar := 0
-
- count := i
- ENDIF
-
- -> STYLES
- CASE "i", "I"
- -> ITALIC
- IF memadr[count+1] = ">"
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "3"
- mem2[count2++] := "m"
-
- count := count + 2
- ELSE
- IF InStr(html, 'IMG') = 0
- mem2[count2++] := "["
- INC cchar
-
- j := InStr(html, 'ALT')
- IF j >= 0
- count := count + j + 1
-
- WHILE memadr[count] <> $22 DO INC count
- INC count
-
- WHILE memadr[count] <> $22
- mem2[count2++] := memadr[count++]
- INC cchar
- ENDWHILE
- ENDIF
-
- count := i
-
-
- mem2[count2++] := "]"
- INC cchar
-
- ELSEIF InStr(html, 'INPUT') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "4"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
-
- IF (InStr(html, 'HIDDEN')) = FALSE
-
- j := InStr(html, 'VALUE')
- IF j >= 0
- count := count + j + 1
-
- WHILE memadr[count] <> $22 DO INC count
- INC count
-
- WHILE memadr[count] <> $22
- mem2[count2++] := memadr[count++]
- INC cchar
- ENDWHILE
- ENDIF
- ENDIF
-
- count := i
-
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "2"
- mem2[count2++] := "4"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "1"
- mem2[count2++] := "m"
- ELSEIF InStr(html, 'IFRAME') = 0
- count := i
- ELSEIF InStr(html, 'ISINDEX') = 0
- count := i
- ELSEIF InStr(html, 'ILAYER') = 0
- count := i
- ENDIF
- ENDIF
-
- -> LIST
- CASE "l", "L"
- IF InStr(html, 'LI') = 0
- count := i
- mem2[count2++] := "\n"
- mem2[count2++] := "o"
- mem2[count2++] := " "
- cchar := 2
- ENDIF
-
- -> META TAG, MAP
- CASE "m", "M"
- IF InStr(html, 'META') = 0
- count := i
- ELSEIF InStr(html, 'MAP') = 0
- count := i
- ENDIF
-
- -> NO FRAMES
- CASE "n", "N"
- IF InStr(html, 'NOFRAMES') = 0
- count := i
- ELSEIF InStr(html, 'NOBR') = 0
- count := i
- ELSEIF InStr(html, 'NOLAYER') = 0
- count := i
- ENDIF
-
- -> ?
- CASE "o", "O"
- IF InStr(html, 'O:') = 0
- count := i
- ELSEIF InStr(html, 'OL') = 0
- count := i
- ELSEIF InStr(html, 'OPTION') = 0
- mem2[count2++] := "\n"
- mem2[count2++] := "o"
- mem2[count2++] := " "
- cchar := 2
- count := i
- ENDIF
-
- -> PARAGRAPH, PRE!?
- CASE "p", "P"
- IF InStr(html, 'PRE') = 0
- mem2[count2++] := "\n"
- cchar := 0
- pre := TRUE
- count := i
- -> PARA
- ELSE
- -> para := TRUE
- mem2[count2++] := "\n"
- mem2[count2++] := "\n"
- cchar := 0
- count := i
- ENDIF
-
- -> STRONG, SMALL, SAMP
- CASE "s", "S"
- IF InStr(html, 'STRONG') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "1"
- mem2[count2++] := "m"
-
- count := i
- ELSEIF InStr(html, 'SMALL') = 0
- count := i
- ELSEIF InStr(html, 'SAMP') = 0
- count := i
- ELSEIF InStr(html, 'STYLE') = 0
- count := i
-
- isnotend := TRUE
- WHILE isnotend
- WHILE memadr[count] <> "<" DO INC count
- UpperStr(StrCopy(html, memadr+count, 10))
- IF InStr(html, '</STYLE>') = 0
- count := count + 8
- isnotend := FALSE
- ELSE
- INC count
- ENDIF
- ENDWHILE
- ELSEIF InStr(html, 'SCRIPT') = 0
- count := i
-
- isnotend := TRUE
- WHILE isnotend
- WHILE memadr[count] <> "<" DO INC count
- UpperStr(StrCopy(html, memadr+count, 10))
- IF InStr(html, '</SCRIPT>') = 0
- count := count + 9
- isnotend := FALSE
- ELSE
- INC count
- ENDIF
- ENDWHILE
-
- ELSEIF InStr(html, 'SPAN') = 0
- count := i
- ELSEIF InStr(html, 'SELECT') = 0
- count := i
- ENDIF
-
- -> TITLE
- CASE "t", "T"
- IF InStr(html, 'TITLE') = 0
- count := i
-
- WHILE memadr[count] <> "<"
- INC count
- ENDWHILE
- ELSEIF InStr(html, 'TEXTAREA') = 0
- count := i
- ELSEIF InStr(html, 'TT') = 0
- count := i
- ELSEIF InStr(html, 'TABLE') = 0
- count := i
- ELSEIF InStr(html, 'TR') = 0
- mem2[count2++] := "\n"
- cchar := 0
-
- count := i
- ELSEIF InStr(html, 'TD') = 0
- count := i
- INC cchar
- mem2[count2++] := "\t"
- ELSEIF InStr(html, 'TH') = 0
- mem2[count2++] := "\t"
- INC cchar
-
- count := i
- ELSEIF InStr(html, 'TBODY') = 0
- count := i
- ENDIF
-
- -> UNDERSCORE, LIST
- CASE "u", "U"
- IF memadr[count+1] = ">"
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "4"
- mem2[count2++] := "m"
- count := count + 2
- ELSEIF InStr(html, 'UL') = 0
- count := i
- ENDIF
-
- -> VAR
- CASE "v", "V"
- IF InStr(html, 'VAR') = 0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "3"
- mem2[count2++] := "m"
-
- count := i
- ENDIF
-
- ENDSELECT
-
- -> Skip trailing spaces after <>
- -> WHILE memadr[count] = " " DO INC count
-
-
- DEFAULT
-
- -> WE ARE ABOUT TO DISPLAY PRINTABLE TEXT
- iscomm := FALSE
-
-
- mem2[count2] := memadr[count++]
- INC cchar
- -> WORDWRAP IF NECESSARY
-
- IF ww = TRUE
- IF cchar >= wordwrap
-
- -> Find last space
-
- IF isspace(mem2[count2]) = TRUE
- mem2[count2++] := "\n"
- -> INC count2
- cchar := 0
- ELSE
- oldpos := count2
- oldcc := cchar
- WHILE isspace(mem2[count2]) = FALSE
- mem2[count2 + 1] := mem2[count2]
- DEC count2
- DEC cchar
- ENDWHILE
-
- INC count2
- mem2[count2] := "\n"
- cchar := oldcc - cchar
-
- count2 := oldpos + 1
- ENDIF
- ENDIF
- ENDIF
-
- INC count2
-
- ENDSELECT
-
-
- ENDWHILE
-
-
- mem2[count2] := "\n"
-
- -> IF memadr
- -> Dispose(memadr); memadr := NIL
- -> ENDIF
- ENDPROC mem2, count2
- -><
-
-